# Import the needed libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from copy import copy
from scipy import stats
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
# Read the data and View it
stocks_df
# Sort the Date column values
stocks_df.sort_values(['Date'])
# Print all the column names
stocks_df.columns[1:] # excludes the date column
# Basic Data Analysis ------------------------------
# What was the average prices of sp500?
display(stocks_df.mean())
# Which stock had the lowest standard deviation
display(stocks_df.std())
# What is the maximum price of AMZN?
max(stocks_df['AMZN'])
# How many null values does each column have?
stocks_df.info()
# Plot all stock prices versus date using a function
def show_plot(data, title):
data.plot(x='Date', figsize=(15,6), linewidth=3, title=title)
plt.grid()
plt.show()
show_plot(stocks_df, 'Stock Prices over time (not normalized)')
# Plot the normalized stock prices over time
# Normalize the data
def normalize(data):
x=data.copy() # Create a copy
# Select every column, except Date, and divide the column by its price on 2012-01-12
for i in x.columns[1:]:
x[i] = data[i] / data[i][0]
return x
stocks_df_normalized = normalize(stocks_df) # Pass stocks_df to normalize()
show_plot(stocks_df_normalized, 'Stock Prices over time (Normalized)') # Plot the data
def interactive_plot(df, title):
fig = px.line(df, title=title) # create a graph object
for i in df.columns[1:]: # Add scattertrace - prices for every stock
fig.add_scatter(x = df['Date'], y = df[i], name = i)
fig.show() # Show me the plot
interactive_plot(stocks_df, 'Raw Stock Prices (not normalized)')
interactive_plot(stocks_df_normalized, 'Prices (Normalized)')
# Let's only calculate the returns of sp500
df = stocks_df['VOO']
daily_returns = df.copy() # Do the calculations using df values, overwrite daily_returns values
for i in range(1, len(daily_returns)):
daily_returns[i] = ((df[i] - df[i-1]) / df[i-1]) * 100
daily_returns[0] = 0
display(daily_returns)
# We can do the same thing for Amazon Stock
df=stocks_df['AMZN']
daily_returns = df.copy() # pull the values from df, calculate the return, overwrite df_returns
for i in range(1, len(daily_returns)):
daily_returns[i] = ((df[i] - df[i-1]) / df[i-1]) * 100
daily_returns[0] = 0
print(daily_returns)
# Basically the same idea must be implemented to every column
def daily_returns(data):
daily_returns = data.copy()
for i in data.columns[1:]: # Loop over the column names
for j in range(1, len(daily_returns)):
daily_returns[i][j] = ((data[i][j] - data[i][j-1]) / data[i][j-1]) * 100 # Calculate the return using the previous day's price
daily_returns[i][0] = 0 # Change the first value of ith column to 0
return daily_returns
df_daily_return = daily_returns(stocks_df)
df_daily_return
# Make both static and interactive plots: write the functions for both types of plots
# A function for static plot built using matplotlib
def show_plot(df, title):
df.plot(x='Date', title=title, linewidth=3, figsize=(15,6))
plt.grid()
plt.show()
# Make the static plot
show_plot(df_daily_return, 'Stock Daily Returns')
# Interative plot of daily return: A function
def interactive_plot(df, title):
# Create a graph object
fig = px.line(df, title=title)
# Loop over the column names
for i in df.columns[1:]:
fig.add_scatter(x=df['Date'], y=df[i], name=i) # Add a line for every stock's returns
fig.show()
interactive_plot(df_daily_return, 'Daily Returns')